#### Code to produce results reported in Appendix  ####
## Author: Jeong Hyun Kim
## Last updated: 12/31/2021


##### Loading the datasets #####
#### Study 1
library(foreign)
library(tidyverse)
library(broom)
library(stargazer)

csid <- read.csv("survey_combined.csv", stringsAsFactors = FALSE)
# Coding variables
csid$gov_support <- ifelse(csid$BQ14=="1", 1, 0)
# Coding young1: those in 20s and 30s
csid$young1 <- ifelse(csid$BQ3 < 40, 1, 0)
# Alternative operationalization of young variable
csid$young2 <- ifelse(csid$BQ3 < 40 & csid$BQ3 > 25, 1, 0)
# Alternative operationalization of young variable
csid$young3 <- ifelse(csid$BQ3 < 39 & csid$BQ3 > 22, 1, 0)  

names(csid)[3] <- "region"
csid$region <- as.factor(csid$region)
names(csid)[4] <- "age"
names(csid)[5] <- "education"
names(csid)[6] <- "occupation"
csid$occupation <- as.factor(csid$occupation)
names(csid)[7] <- "job_status"
table(csid$job_status)

csid[which(is.na(csid$job_status)), "job_status"] <- "other"
csid[which(csid$job_status=="#NULL!"),"job_status"] <- "other" # unemployed; retired; student
csid[which(csid$job_status=="1"),"job_status"] <- "regular"
csid[which(csid$job_status=="2"),"job_status"] <- "temp"
csid$temp_worker <- ifelse(csid$job_status=="temp", 1, 0)
table(csid$temp_worker)
# Full-time/ part-time/ unemployed/other 

names(csid)[8] <- "marital_status"
names(csid)[9] <- "children"
names(csid)[10] <- "religion"
names(csid)[11] <- "income"
names(csid)[12] <- "ideology"
names(csid)[13] <- "partyID"

# For BQ13, 99 is coded as don't know.
csid[which(csid$ideology=="99"),"ideology"] <- NA

#### Loading full sample (including those who didn't pass the manipulation check)
csid.all <- read.csv("survey_combined_all.csv")
csid.all$pass <- NA
csid.all[which(csid.all$T1_pass==1), "pass"] <- 1
csid.all[which(csid.all$T2_pass==1), "pass"] <- 1
csid.all[which(csid.all$T1_pass==0), "pass"] <- 0
csid.all[which(csid.all$T2_pass==0), "pass"] <- 0

# Coding variables
csid.all$gov_support <- ifelse(csid.all$BQ14=="1", 1, 0)


names(csid.all)[3] <- "region"
csid.all$region <- as.factor(csid.all$region)
names(csid.all)[4] <- "age"
names(csid.all)[5] <- "education"
names(csid.all)[6] <- "occupation"
csid.all$occupation <- as.factor(csid.all$occupation)
names(csid.all)[7] <- "job_status"
table(csid.all$job_status)

csid.all[which(is.na(csid.all$job_status)), "job_status"] <- "other"
csid.all[which(csid.all$job_status=="#NULL!"),"job_status"] <- "other" # unemployed; retired; student
csid.all[which(csid.all$job_status=="1"),"job_status"] <- "regular"
csid.all[which(csid.all$job_status=="2"),"job_status"] <- "temp"
csid.all$temp_worker <- ifelse(csid.all$job_status=="temp", 1, 0)
table(csid.all$temp_worker)
# Full-time/ part-time/ unemployed/other 

names(csid.all)[8] <- "marital_status"
names(csid.all)[9] <- "children"
names(csid.all)[10] <- "religion"
names(csid.all)[11] <- "income"
names(csid.all)[12] <- "ideology"
names(csid.all)[13] <- "partyID"
# For BQ13, 99 is coded as don't know.
csid.all[which(csid.all$ideology=="99"),"ideology"] <- NA

csid.all$treatment <- NA
csid.all[!is.na(csid.all$C_M_T1), "treatment"] <- 1
csid.all[!is.na(csid.all$C_M_T2), "treatment"] <- 0


#### Study 2
df <- read.spss("Survey_2021/survey2021.sav", to.data.frame = TRUE, use.value.labels = FALSE)

# Create a variable that denotes respondents who passed manipulation check:
df <- df %>% mutate(T1_pass = case_when(K0 == 1 & K1==1  ~ 1,
                                        K0 == 1 & K1==2 ~ 0),
                    T2_pass = case_when(K0 == 2 & K2 ==1 ~ 1,
                                        K0 == 2 & K2 == 2 ~ 0))

# code the respondents who received "T1" as treated:
df <- df %>% mutate(treated = case_when(K0 == 1 ~ 1,
                                        K0 == 2 ~ 0))

df$young1 <- ifelse(df$BQ3 < 40, 1, 0)
# Alternative operationalization of young variable
df$young2 <- ifelse(df$BQ3 < 40 & df$BQ3 > 25, 1, 0)
# Alternative operationalization of young variable
df$young3 <- ifelse(df$BQ3 < 41 & df$BQ3 > 24, 1, 0) 
df$young4 <- ifelse(df$BQ3 < 35 & df$BQ3 > 19, 1, 0)  # 20-34

df<-  df %>% rename(age = BQ3,
                    education = BQ4,
                    occupation = BQ5,
                    job_status = BQ5_3,
                    region = BQ1,
                    income = BQ12_1,
                    ideology = BQ13,
                    religion = BQ8,
                    marital_status = BQ6,
                    gender_quota_att = K3_1,
                    equal_pay = K3_2,
                    corporate_quota = K3_3
)
df <- df %>% rename(male_status = K6,
                    female_status = K7)

df <- df %>% mutate(children = case_when(ABQ7 == 1 ~ 1,
                                         ABQ7 == 2 ~ 0),
                    econ_bad = ifelse(K9==1| K9==2, 1, 0))
## gender norms
# K5_1: 가사노동 (lower value indicates more related to men)
# K5_2: 사업 (lower value indicates more related to men)
# K5_3: 육아 (lower value indicates more related to men)
# K5_4: 정치 
df <- df %>% mutate(gender_norm_1 = ifelse(K5_1==5| K5_1==6| K5_1==7, 1, 0),
                    gender_norm_2 = ifelse(K5_2==1| K5_2==2 | K5_2 ==3, 1, 0),
                    gender_norm_3 = ifelse(K5_3==5| K5_3==6| K5_3==7, 1, 0),
                    gender_norm_4 = ifelse(K5_4==1| K5_4==2| K5_4==3, 1, 0))

#### Table A1: Descriptive Stats ####
# Panel (A) Study 1
sapply(csid[,c("age", "education", "income", "ideology","gov_support")], summary)
sapply(csid[,c("age", "education", "income", "ideology","gov_support")], function(x) sd(x, na.rm=T))
sapply(csid[,c("age", "education", "income", "ideology","gov_support")], function(x) table(is.na(x)))
# Panel (B) Study 2
sapply(df[,c("BQ2", "age", "education", "income")], summary)
sapply(df[,c("BQ2", "age", "education", "income")], function(x) sd(x, na.rm=T))
sd(df[-which(df$ideology=="99"), "ideology"]); length(which(df$ideology=="99"))
sd(df[-which(df$ideology=="99"), "ideology"]); length(which(df$ideology=="99"))
summary(df[-which(df$ideology=="99"), "ideology"])

#### Table A2: Balance Tests ####
## Study 1
b.mod.1 <- glm(treated ~ age +  education + income + 
                 + ideology + gov_support , family="binomial", csid)
summary(b.mod.1)
nrow(b.mod.1$model)
b.mod.2 <- glm(treatment ~ age +  education + income + 
                 + ideology + gov_support , family="binomial", csid.all)
summary(b.mod.2)
nrow(b.mod.2$model)

## Study 2
df.all <- df
df.men.all <- df.all %>% filter(BQ2=="1")
df.women.all <- df.all %>% filter(BQ2=="2")

# Reduce the sample to those who passed the manipulation check:
df <- df[which(df$T1_pass==1 | df$T2_pass==1), ]
df.men <- df %>% filter(BQ2=="1")
df.women <- df %>% filter(BQ2=="2")

b.mod.3 <- glm(treated ~ BQ2 + age +  education + income + 
                 + ideology, family="binomial", df)
summary(b.mod.3)
nrow(b.mod.3$model)

b.mod.4 <- glm(treated ~ BQ2 + age +  education + income + 
                 + ideology , family="binomial", df.all)

summary(b.mod.4)
nrow(b.mod.4$model)

#### Table A3: Full Sample Analysis ####
csid.all$young1 <- ifelse(csid.all$age < 40, 1, 0)

mod.si.1.1 <- lm(gender_quota_att ~ treated +  as.factor(wave), csid.all)
summary(mod.si.1.1)

mod.si.1.2 <- lm(gender_quota_att ~ treated +  as.factor(wave) +  young1 
                 + region + education + income + as.factor(occupation) 
                 + ideology + as.factor(partyID) 
                 + econ_bad + as.factor(religion) +  as.factor(marital_status) 
                 + as.factor(children), csid.all)
summary(mod.si.1.2)
nrow(mod.si.1.2$model)

mod.si.1.3 <- lm(gender_quota_att ~ treated*young1 +  as.factor(wave) , csid.all)
summary(mod.si.1.3)

mod.si.1.4 <- lm(gender_quota_att ~ treated*young1 +  as.factor(wave) + region
                 + education + income + as.factor(occupation) + ideology + as.factor(partyID)
                 + econ_bad + as.factor(religion) + as.factor(marital_status) 
                 + as.factor(children), csid.all)
summary(mod.si.1.4)

mod.si.1.5 <- lm(as.numeric(gender_quota_att) ~ treated *  young1 , df.men.all) 
summary(mod.si.1.5)
mod.si.1.6 <- lm(as.numeric(gender_quota_att) ~ treated *  young1 +  region
                 + education + income + as.factor(occupation) + ideology
                 + econ_bad + as.factor(religion) + as.factor(marital_status) 
                 + as.factor(children), df.men.all) 
summary(mod.si.1.6)

mod.si.1.7 <- lm(as.numeric(gender_quota_att) ~ treated *  young1 , df.women.all) 
summary(mod.si.1.7)

mod.si.1.8 <- lm(as.numeric(gender_quota_att) ~ treated *  young1 +  region
                 + education + income + as.factor(occupation) + ideology
                 + econ_bad + as.factor(religion) + as.factor(marital_status) 
                 + as.factor(children), df.women.all) 
summary(mod.si.1.8)

#### Table A4: Interaction with Confounding Variables ####
mod.si.4 <- lm(gender_quota_att ~ treated*young1 + treated*econ_bad + treated*education + treated*ideology + age + as.factor(wave), csid)
summary(mod.si.4)
nrow(mod.si.4$model)

##### Table A5 & A6: Different young operationalization ####
# Treatment effect for young2:
mod.si.5.1 <- lm(gender_quota_att ~ treated*young2 +  as.factor(wave) , csid)
mod.si.5.2 <- lm(gender_quota_att ~ treated*young2 , df.men)
mod.si.5.3 <- lm(gender_quota_att ~ treated*young2 , df.women)
# treatment effect for young3:
mod.si.6.1 <- lm(gender_quota_att ~ treated*young3 +  as.factor(wave) , csid)
mod.si.6.2 <- lm(as.numeric(gender_quota_att) ~ treated *  young3 , df.men) 
mod.si.6.3 <- lm(as.numeric(gender_quota_att) ~ treated *  young3 , df.women) 


#### Table A9: Heterogeneous Treatment Effect ####
df <- df %>% mutate(insecure = case_when(K9==1 ~ 4,
                                         K9==2 ~ 3,
                                         K9==3 ~ 2,
                                         K9==4 ~ 1),
                    college = case_when(education==1 ~ 0,
                                        education==2 ~ 0,
                                        education==3 ~ 1,
                                        education==4 ~ 1,
                                        education==5 ~ 1,
                                        education==6 ~ 1))
df.men <- df %>% filter(BQ2=="1")
df.women <- df %>% filter(BQ2=="2")

# job status: 1 - full-time, 2-temporary
model1 <- lm(gender_quota_att ~ treated*young1*as.factor(job_status), df.men)
model2 <- lm(gender_quota_att ~ treated*young1*college, df.men)
model3 <- lm(gender_quota_att ~ treated*young1*insecure, df.men)
model4 <- lm(gender_quota_att ~ treated*young1*as.factor(job_status), df.women)
model5 <- lm(gender_quota_att ~ treated*young1*college, df.women)
model6 <- lm(gender_quota_att ~ treated*young1*insecure, df.women)
stargazer(model1, model2, model3, model4, model5, model6, star.cutoffs=c(0.05, 0.01, 0.001))

#### Section AG: Economic Perceptions by Age Group ####
# For this figure, we are presenting responses of Control group only. 
df <- df[which(df$treated==0),]
df <- df %>% mutate(gender  = ifelse(BQ2=="1", "Men", "Women"),
                    gender= factor(gender, levels=c("Men", "Women")))

p3 <- ggplot(df, aes(reorder(young1, desc(young1)), as.numeric(K8), fill= reorder(young1, desc(young1)))) + 
  stat_summary(fun = mean, geom="bar", width=.75) + 
  stat_summary(fun.data = mean_cl_normal, geom= "errorbar",  width=0.1, alpha=1, size = 1) + 
  theme_bw() + scale_fill_manual(values = c("1" = "grey85", "0" = "grey55"),
                                 labels = c("Young (<40)", "Old (>=40)"),
                                 name = c("")) + 
  theme(axis.text.x = element_blank(), legend.text = element_text(size = 18),
        axis.ticks.x = element_blank(), legend.position = "bottom",
        axis.text.y = element_text(size = 18),axis.title.y = element_text(size = 15)) + xlab("") + ylab("") +
  ggtitle("Success is Easier for Today's Young Generation.") + theme(text = element_text(size=12)) + facet_grid(.~gender) + coord_cartesian(ylim=c(1, 4)) 
p3
